import warnings
warnings.filterwarnings('ignore')
# Importación librerías
import pandas as pd
import numpy as np
import keras
import tensorflow as tf
from numpy import array
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.preprocessing.sequence import TimeseriesGenerator
import os
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import optuna
from optuna import Trial
from sklearn import metrics
from keras.callbacks import EarlyStopping,ReduceLROnPlateau
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from pandas_datareader import data as pdr
import yfinance as yfin
yfin.pdr_override()
data_train=pd.read_excel('dftrain.xlsx',index_col='Date')
data_val=pd.read_excel('dftest.xlsx',index_col='Date')
data_test=pd.read_excel('dftestall.xlsx',index_col='Date')
data_train.columns=[i.replace('.ReturnSuavizado','') for i in data_train.columns]
data_val.columns=[i.replace('.ReturnSuavizado','') for i in data_val.columns]
data_test.columns=[i.replace('.ReturnSuavizado','') for i in data_test.columns]
frames = [data_train,data_val]
data_trainall = pd.concat(frames)
data_trainall
| GSPC | AAPL | AMZN | MSTF | TSLA | GOOG | GOOGL | NVDA | BRK.B | META | UNH | JNJ | PG | VIX | DolarIndex | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||
| 2012-05-21 | 1.746909 | 3.490283 | 1.070000 | 1.075062 | 1.356247 | 1.510118 | 1.493379 | 0.655306 | 1.049199 | 1.510658 | 2.073412 | 0.155280 | -0.270027 | -1.810652 | -0.199267 |
| 2012-05-22 | -0.007725 | -0.534660 | -0.767095 | -0.020680 | 2.180645 | -1.547496 | -1.525078 | -0.556728 | -0.228852 | 1.510658 | 0.172248 | 0.036174 | -0.458769 | 0.327256 | 0.020139 |
| 2012-05-23 | 0.129489 | 1.455819 | 0.465606 | -1.562769 | 0.184059 | 0.941975 | 0.932516 | 0.950053 | 0.077185 | 1.510658 | -0.540879 | -0.473359 | -1.358408 | -0.062811 | 2.697792 |
| 2012-05-24 | 0.093857 | -0.629651 | -0.575519 | -0.138388 | -0.838114 | -0.698745 | -0.687197 | -1.156537 | 0.015914 | 1.507333 | 1.096323 | 0.705074 | 0.263350 | -0.473912 | -0.331184 |
| 2012-05-25 | -0.309696 | -0.388628 | -0.662707 | -0.067466 | -0.560168 | -1.436628 | -1.415629 | 0.919416 | -0.720151 | -1.709279 | -0.189339 | -1.060502 | -0.186788 | 0.173503 | 0.474968 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2023-01-25 | -0.083719 | -0.347177 | 0.458486 | -0.451382 | 0.073811 | -1.781387 | -1.782862 | 0.071015 | 0.142652 | -0.592635 | 0.080526 | 0.716622 | -0.124685 | -0.056849 | -0.374998 |
| 2023-01-26 | 1.182492 | 0.866959 | 1.129257 | 2.037044 | 3.353924 | 1.663595 | 1.580865 | 0.953151 | -0.342923 | 1.920064 | -0.059264 | -0.443317 | -0.484419 | -0.228505 | 0.105223 |
| 2023-01-27 | 0.220445 | 0.797927 | 1.648946 | 0.000575 | 3.363623 | 1.024389 | 1.236573 | 1.098853 | -0.605462 | 1.410943 | -1.038815 | -0.470380 | -0.493864 | -0.134595 | -0.300968 |
| 2023-01-30 | -1.548164 | -1.321312 | -0.983807 | -1.571251 | -2.182643 | -1.951333 | -1.718173 | -2.552922 | -0.627754 | -1.555253 | -0.096230 | 0.210523 | 0.289765 | 1.074298 | -0.181023 |
| 2023-01-31 | 1.590772 | 0.509451 | 1.386626 | 1.386587 | 1.213647 | 1.292552 | 1.278452 | 0.743473 | 1.276171 | 0.594704 | 1.978149 | 0.891425 | 0.997751 | -0.353844 | 0.192047 |
2792 rows × 15 columns
datapacf=data_train.GSPC.diff()
datapacf=datapacf[1:]
import matplotlib as mpl
from matplotlib import pyplot
from statsmodels.graphics.tsaplots import plot_pacf
with mpl.rc_context():
mpl.rc("figure", figsize=(10,5))
plot_pacf(datapacf, lags=50)
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize=(16, 6))
heatmap = sns.heatmap(data_train.corr(), vmin=-1, vmax=1, annot=True)
heatmap.set_title('Correlation Heatmap', fontdict={'fontsize':12}, pad=12)
Text(0.5, 1.0, 'Correlation Heatmap')
data_train=data_train[['GSPC','MSTF','BRK.B','DolarIndex']]
data_val=data_val[['GSPC','MSTF','BRK.B','DolarIndex']]
data_test=data_test[['GSPC','MSTF','BRK.B','DolarIndex']]
data_trainall=data_trainall[['GSPC','MSTF','BRK.B','DolarIndex']]
def shift_data(df,serie_name,period):
for i in range(1,period+1):
last=len(df.columns)
df.insert(last, serie_name+'.'+str(i), df[serie_name].shift(periods=i))
cur_columns=data_train.columns
for col in cur_columns:
shift_data(data_train,col,5)
data_train=data_train.dropna()
cur_columns=data_val.columns
for col in cur_columns:
shift_data(data_val,col,5)
data_val=data_val.dropna()
cur_columns=data_test.columns
for col in cur_columns:
shift_data(data_test,col,5)
data_test=data_test.dropna()
cur_columns=data_trainall.columns
for col in cur_columns:
shift_data(data_trainall,col,5)
data_trainall=data_trainall.dropna()
scaler = MinMaxScaler()
train_scaled = pd.DataFrame(scaler.fit_transform(data_train), columns=data_train.columns)
xtrain=train_scaled.iloc[:,1:]
ytrain=train_scaled.iloc[:,0]
val_scaled=pd.DataFrame(scaler.transform(data_val), columns=data_val.columns)
xval=val_scaled.iloc[:,1:]
yval=val_scaled.iloc[:,0]
scaler_all = MinMaxScaler()
trainall_scaled=pd.DataFrame(scaler_all.fit_transform(data_trainall), columns=data_trainall.columns)
xtrainall=trainall_scaled.iloc[:,1:]
ytrainall=trainall_scaled.iloc[:,0]
test_scaled = pd.DataFrame(scaler_all.transform(data_test), columns=data_test.columns)
xtest=test_scaled.iloc[:,1:]
ytest=test_scaled.iloc[:,0]
print(xtrain.shape[0])
print(xval.shape[0])
print(xtest.shape[0])
print(xtrainall.shape[0])
2767 15 15 2787
traindates=data_train.index
testdates=data_test.index
valdates=data_val.index
traindatesall=data_trainall.index
from sklearn.neighbors import LocalOutlierFactor
lof = LocalOutlierFactor()
yhat = lof.fit_predict(xtrain)
mask = yhat != -1
xtrain_no, ytrain_no,traindates = xtrain[mask], ytrain[mask],traindates[mask]
lof = LocalOutlierFactor()
yhat = lof.fit_predict(xtrainall)
mask = yhat != -1
xtrainall_no, ytrainall_no,traindates = xtrainall[mask], ytrainall[mask],traindatesall[mask]
ytrain_n=ytrain_no.to_numpy().reshape(-1,1,1)
xtrain_n=xtrain_no.to_numpy().reshape(-1,1,data_train.shape[1]-1)
yval_n=yval.to_numpy().reshape(-1,1,1)
xval_n=xval.to_numpy().reshape(-1,1,data_val.shape[1]-1)
ytest_n=ytest.to_numpy().reshape(-1,1,1)
xtest_n=xtest.to_numpy().reshape(-1,1,data_test.shape[1]-1)
ytrainall_n=ytrainall_no.to_numpy().reshape(-1,1,1)
xtrainall_n=xtrainall_no.to_numpy().reshape(-1,1,data_trainall.shape[1]-1)
import random
seed = 128
def random_seed(seed):
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
random_seed(seed)
def objective(trial):
keras.backend.clear_session()
n_layers = trial.suggest_int('n_layers', 1, 4)
model = keras.Sequential()
for i in range(n_layers):
num_hidden = trial.suggest_int(f'n_units_l{i}', data_train.shape[1]-1, 400, log=True)
model.add(keras.layers.LSTM(num_hidden, input_shape=(1, data_train.shape[1]-1),return_sequences=True,
activation=trial.suggest_categorical(f'activation{i}', ['relu', 'linear','swish','sigmoid'])))
model.add(keras.layers.Dropout(rate = trial.suggest_float(f'dropout{i}', 0.0, 0.5)))
model.add(keras.layers.Dense(1,activation=trial.suggest_categorical(f'finalact', ['relu', 'linear','swish','sigmoid'])))
val_ds = (xval_n,yval_n)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=trial.suggest_float('LRfactor', 0.0, 0.5),patience=trial.suggest_int('LRpatience', 5, 20),min_lr=1e-05,verbose=0)
model.compile(loss='mse', optimizer=trial.suggest_categorical(f'optimizer', ['Adagrad','adam', 'sgd','RMSprop']))
run_history = model.fit(xtrain_n,ytrain_n,validation_data=val_ds,epochs=50,callbacks=[reduce_lr],verbose=0)
return min(run_history.history['val_loss'])
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=30, timeout=1800)
print("Number of finished trials: {}".format(len(study.trials)))
print("Best trial:")
trial = study.best_trial
print(" Value: {}".format(trial.value))
[I 2023-05-04 21:39:04,328] A new study created in memory with name: no-name-2f726e47-3c77-4804-8b9a-7d9d1ced5527 [I 2023-05-04 21:39:33,936] Trial 0 finished with value: 0.03469480574131012 and parameters: {'n_layers': 2, 'n_units_l0': 63, 'activation0': 'sigmoid', 'dropout0': 0.4497758295784005, 'n_units_l1': 36, 'activation1': 'swish', 'dropout1': 0.45494398736702957, 'finalact': 'relu', 'LRfactor': 0.3385966726258443, 'LRpatience': 20, 'optimizer': 'Adagrad'}. Best is trial 0 with value: 0.03469480574131012. [I 2023-05-04 21:40:25,939] Trial 1 finished with value: 0.22325070202350616 and parameters: {'n_layers': 3, 'n_units_l0': 210, 'activation0': 'linear', 'dropout0': 0.08908484293328278, 'n_units_l1': 58, 'activation1': 'relu', 'dropout1': 0.3607393324724844, 'n_units_l2': 64, 'activation2': 'swish', 'dropout2': 0.050528786668444625, 'finalact': 'swish', 'LRfactor': 0.4261282763954562, 'LRpatience': 7, 'optimizer': 'Adagrad'}. Best is trial 0 with value: 0.03469480574131012. [I 2023-05-04 21:41:09,290] Trial 2 finished with value: 0.11625661700963974 and parameters: {'n_layers': 4, 'n_units_l0': 27, 'activation0': 'linear', 'dropout0': 0.32081556059981314, 'n_units_l1': 48, 'activation1': 'linear', 'dropout1': 0.4271050399523469, 'n_units_l2': 56, 'activation2': 'relu', 'dropout2': 0.1350346668384107, 'n_units_l3': 23, 'activation3': 'relu', 'dropout3': 0.07419599259274368, 'finalact': 'relu', 'LRfactor': 0.06626474452211745, 'LRpatience': 11, 'optimizer': 'Adagrad'}. Best is trial 0 with value: 0.03469480574131012. [I 2023-05-04 21:42:03,526] Trial 3 finished with value: 0.0024616126902401447 and parameters: {'n_layers': 2, 'n_units_l0': 119, 'activation0': 'linear', 'dropout0': 0.4666291475397445, 'n_units_l1': 321, 'activation1': 'linear', 'dropout1': 0.23297453653473893, 'finalact': 'linear', 'LRfactor': 0.3716491551697835, 'LRpatience': 16, 'optimizer': 'adam'}. Best is trial 3 with value: 0.0024616126902401447. [I 2023-05-04 21:42:50,352] Trial 4 finished with value: 0.028451677411794662 and parameters: {'n_layers': 2, 'n_units_l0': 228, 'activation0': 'sigmoid', 'dropout0': 0.2936512952485221, 'n_units_l1': 83, 'activation1': 'swish', 'dropout1': 0.4866080184224619, 'finalact': 'relu', 'LRfactor': 0.21004610191077522, 'LRpatience': 13, 'optimizer': 'Adagrad'}. Best is trial 3 with value: 0.0024616126902401447. [I 2023-05-04 21:43:56,081] Trial 5 finished with value: 0.002349897287786007 and parameters: {'n_layers': 4, 'n_units_l0': 278, 'activation0': 'linear', 'dropout0': 0.19204236108062905, 'n_units_l1': 23, 'activation1': 'sigmoid', 'dropout1': 0.4787275890234401, 'n_units_l2': 270, 'activation2': 'swish', 'dropout2': 0.25183272134384543, 'n_units_l3': 75, 'activation3': 'relu', 'dropout3': 0.07401573027113578, 'finalact': 'swish', 'LRfactor': 0.30819563778768044, 'LRpatience': 19, 'optimizer': 'adam'}. Best is trial 5 with value: 0.002349897287786007. [I 2023-05-04 21:44:57,951] Trial 6 finished with value: 0.027446050196886063 and parameters: {'n_layers': 3, 'n_units_l0': 98, 'activation0': 'swish', 'dropout0': 0.11033841508715692, 'n_units_l1': 266, 'activation1': 'sigmoid', 'dropout1': 0.007128239202319531, 'n_units_l2': 233, 'activation2': 'sigmoid', 'dropout2': 0.18063155986861595, 'finalact': 'swish', 'LRfactor': 0.4353618787241985, 'LRpatience': 18, 'optimizer': 'Adagrad'}. Best is trial 5 with value: 0.002349897287786007. [I 2023-05-04 21:46:07,778] Trial 7 finished with value: 0.025309884920716286 and parameters: {'n_layers': 4, 'n_units_l0': 86, 'activation0': 'relu', 'dropout0': 0.13469944325185834, 'n_units_l1': 51, 'activation1': 'sigmoid', 'dropout1': 0.45407596417766094, 'n_units_l2': 233, 'activation2': 'relu', 'dropout2': 0.1939623726401456, 'n_units_l3': 137, 'activation3': 'swish', 'dropout3': 0.001329233805416219, 'finalact': 'swish', 'LRfactor': 0.39399612905476733, 'LRpatience': 17, 'optimizer': 'RMSprop'}. Best is trial 5 with value: 0.002349897287786007. [I 2023-05-04 21:47:12,579] Trial 8 finished with value: 0.02543598599731922 and parameters: {'n_layers': 3, 'n_units_l0': 184, 'activation0': 'swish', 'dropout0': 0.03378915321380127, 'n_units_l1': 159, 'activation1': 'sigmoid', 'dropout1': 0.14886044336657522, 'n_units_l2': 107, 'activation2': 'linear', 'dropout2': 0.1958553785729048, 'finalact': 'sigmoid', 'LRfactor': 0.25626449401087686, 'LRpatience': 7, 'optimizer': 'Adagrad'}. Best is trial 5 with value: 0.002349897287786007. [I 2023-05-04 21:47:31,867] Trial 9 finished with value: 0.02460320107638836 and parameters: {'n_layers': 1, 'n_units_l0': 35, 'activation0': 'relu', 'dropout0': 0.22335975890434528, 'finalact': 'linear', 'LRfactor': 0.40034922645562904, 'LRpatience': 19, 'optimizer': 'Adagrad'}. Best is trial 5 with value: 0.002349897287786007. [I 2023-05-04 21:49:10,367] Trial 10 finished with value: 0.026280028745532036 and parameters: {'n_layers': 4, 'n_units_l0': 386, 'activation0': 'linear', 'dropout0': 0.1985833543391322, 'n_units_l1': 23, 'activation1': 'relu', 'dropout1': 0.3294185031066814, 'n_units_l2': 379, 'activation2': 'swish', 'dropout2': 0.39491531481846653, 'n_units_l3': 237, 'activation3': 'linear', 'dropout3': 0.41822073147160505, 'finalact': 'sigmoid', 'LRfactor': 0.4969425578807015, 'LRpatience': 13, 'optimizer': 'sgd'}. Best is trial 5 with value: 0.002349897287786007. [I 2023-05-04 21:49:53,861] Trial 11 finished with value: 0.00242361961863935 and parameters: {'n_layers': 1, 'n_units_l0': 370, 'activation0': 'linear', 'dropout0': 0.4936815127364292, 'finalact': 'linear', 'LRfactor': 0.3025365406445617, 'LRpatience': 16, 'optimizer': 'adam'}. Best is trial 5 with value: 0.002349897287786007. [I 2023-05-04 21:50:38,290] Trial 12 finished with value: 0.0022341052535921335 and parameters: {'n_layers': 1, 'n_units_l0': 385, 'activation0': 'linear', 'dropout0': 0.3748601955310189, 'finalact': 'linear', 'LRfactor': 0.2812289816819257, 'LRpatience': 15, 'optimizer': 'adam'}. Best is trial 12 with value: 0.0022341052535921335. [I 2023-05-04 21:51:15,565] Trial 13 finished with value: 0.0023731838446110487 and parameters: {'n_layers': 1, 'n_units_l0': 297, 'activation0': 'linear', 'dropout0': 0.3726751558901054, 'finalact': 'linear', 'LRfactor': 0.21513406150227118, 'LRpatience': 15, 'optimizer': 'adam'}. Best is trial 12 with value: 0.0022341052535921335. [I 2023-05-04 21:52:12,785] Trial 14 finished with value: 0.0030617346055805683 and parameters: {'n_layers': 3, 'n_units_l0': 390, 'activation0': 'linear', 'dropout0': 0.37645032484453356, 'n_units_l1': 24, 'activation1': 'sigmoid', 'dropout1': 0.3402940035489825, 'n_units_l2': 26, 'activation2': 'swish', 'dropout2': 0.3473589864184872, 'finalact': 'swish', 'LRfactor': 0.27683807897904333, 'LRpatience': 11, 'optimizer': 'adam'}. Best is trial 12 with value: 0.0022341052535921335. [I 2023-05-04 21:52:52,475] Trial 15 finished with value: 0.002801797352731228 and parameters: {'n_layers': 2, 'n_units_l0': 263, 'activation0': 'linear', 'dropout0': 0.25316446423339506, 'n_units_l1': 106, 'activation1': 'sigmoid', 'dropout1': 0.4997830055487583, 'finalact': 'linear', 'LRfactor': 0.21203124146168495, 'LRpatience': 20, 'optimizer': 'adam'}. Best is trial 12 with value: 0.0022341052535921335. [I 2023-05-04 21:53:17,713] Trial 16 finished with value: 0.02614479511976242 and parameters: {'n_layers': 1, 'n_units_l0': 162, 'activation0': 'sigmoid', 'dropout0': 0.19644192905300228, 'finalact': 'swish', 'LRfactor': 0.1433997174995662, 'LRpatience': 5, 'optimizer': 'sgd'}. Best is trial 12 with value: 0.0022341052535921335. [I 2023-05-04 21:54:12,643] Trial 17 finished with value: 0.025485746562480927 and parameters: {'n_layers': 4, 'n_units_l0': 259, 'activation0': 'relu', 'dropout0': 0.39136264370980867, 'n_units_l1': 34, 'activation1': 'linear', 'dropout1': 0.26290349477033526, 'n_units_l2': 145, 'activation2': 'linear', 'dropout2': 0.4920459106277286, 'n_units_l3': 61, 'activation3': 'sigmoid', 'dropout3': 0.20011530860455395, 'finalact': 'sigmoid', 'LRfactor': 0.3225318885145574, 'LRpatience': 14, 'optimizer': 'RMSprop'}. Best is trial 12 with value: 0.0022341052535921335. [I 2023-05-04 21:54:58,154] Trial 18 finished with value: 0.0024991645477712154 and parameters: {'n_layers': 2, 'n_units_l0': 150, 'activation0': 'swish', 'dropout0': 0.28992015279803035, 'n_units_l1': 81, 'activation1': 'relu', 'dropout1': 0.41806921128574387, 'finalact': 'linear', 'LRfactor': 0.3357577341446319, 'LRpatience': 18, 'optimizer': 'adam'}. Best is trial 12 with value: 0.0022341052535921335. [I 2023-05-04 21:56:28,899] Trial 19 finished with value: 0.0022323420271277428 and parameters: {'n_layers': 3, 'n_units_l0': 309, 'activation0': 'linear', 'dropout0': 0.16142889562535626, 'n_units_l1': 138, 'activation1': 'swish', 'dropout1': 0.4017422879433487, 'n_units_l2': 398, 'activation2': 'sigmoid', 'dropout2': 0.30006760453802533, 'finalact': 'swish', 'LRfactor': 0.26461943339709043, 'LRpatience': 10, 'optimizer': 'adam'}. Best is trial 19 with value: 0.0022323420271277428. [I 2023-05-04 21:58:12,820] Trial 20 finished with value: 0.002155124209821224 and parameters: {'n_layers': 3, 'n_units_l0': 197, 'activation0': 'linear', 'dropout0': 0.14939677163716047, 'n_units_l1': 193, 'activation1': 'swish', 'dropout1': 0.40035016781108546, 'n_units_l2': 376, 'activation2': 'sigmoid', 'dropout2': 0.29387234491389896, 'finalact': 'linear', 'LRfactor': 0.16202265241592126, 'LRpatience': 10, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224. [I 2023-05-04 21:59:53,432] Trial 21 finished with value: 0.002449318766593933 and parameters: {'n_layers': 3, 'n_units_l0': 323, 'activation0': 'linear', 'dropout0': 0.15681136752530977, 'n_units_l1': 194, 'activation1': 'swish', 'dropout1': 0.3908307192158927, 'n_units_l2': 353, 'activation2': 'sigmoid', 'dropout2': 0.292021819408258, 'finalact': 'linear', 'LRfactor': 0.1532165786804657, 'LRpatience': 10, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224. [I 2023-05-04 22:01:32,500] Trial 22 finished with value: 0.002280886285007 and parameters: {'n_layers': 3, 'n_units_l0': 221, 'activation0': 'linear', 'dropout0': 0.067689589578532, 'n_units_l1': 396, 'activation1': 'swish', 'dropout1': 0.3833348197002681, 'n_units_l2': 168, 'activation2': 'sigmoid', 'dropout2': 0.308862505529691, 'finalact': 'linear', 'LRfactor': 0.25650878087315787, 'LRpatience': 9, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224. [I 2023-05-04 22:02:31,296] Trial 23 finished with value: 0.002222359413281083 and parameters: {'n_layers': 3, 'n_units_l0': 309, 'activation0': 'linear', 'dropout0': 0.15449851362758502, 'n_units_l1': 142, 'activation1': 'swish', 'dropout1': 0.3142397484419011, 'n_units_l2': 330, 'activation2': 'sigmoid', 'dropout2': 0.37650755745813574, 'finalact': 'linear', 'LRfactor': 0.006563801861941221, 'LRpatience': 8, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224. [I 2023-05-04 22:03:01,315] Trial 24 finished with value: 0.025332503020763397 and parameters: {'n_layers': 3, 'n_units_l0': 188, 'activation0': 'linear', 'dropout0': 0.15160521525766119, 'n_units_l1': 156, 'activation1': 'swish', 'dropout1': 0.2961503996956677, 'n_units_l2': 371, 'activation2': 'sigmoid', 'dropout2': 0.3667782295941331, 'finalact': 'linear', 'LRfactor': 0.0012695372904482083, 'LRpatience': 8, 'optimizer': 'sgd'}. Best is trial 20 with value: 0.002155124209821224. [I 2023-05-04 22:03:40,157] Trial 25 finished with value: 0.025236334651708603 and parameters: {'n_layers': 3, 'n_units_l0': 300, 'activation0': 'linear', 'dropout0': 0.052550274581807846, 'n_units_l1': 122, 'activation1': 'swish', 'dropout1': 0.4019088025292165, 'n_units_l2': 398, 'activation2': 'sigmoid', 'dropout2': 0.4161923253158957, 'finalact': 'swish', 'LRfactor': 0.09527728529910556, 'LRpatience': 5, 'optimizer': 'RMSprop'}. Best is trial 20 with value: 0.002155124209821224. [I 2023-05-04 22:04:17,147] Trial 26 finished with value: 0.0021845053415745497 and parameters: {'n_layers': 3, 'n_units_l0': 237, 'activation0': 'relu', 'dropout0': 0.016016209903262885, 'n_units_l1': 218, 'activation1': 'swish', 'dropout1': 0.33563232437318946, 'n_units_l2': 261, 'activation2': 'sigmoid', 'dropout2': 0.29746006798122976, 'finalact': 'sigmoid', 'LRfactor': 0.02577697345885166, 'LRpatience': 11, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224. [I 2023-05-04 22:04:39,006] Trial 27 finished with value: 0.0022640665993094444 and parameters: {'n_layers': 2, 'n_units_l0': 158, 'activation0': 'relu', 'dropout0': 0.024388311603342677, 'n_units_l1': 215, 'activation1': 'swish', 'dropout1': 0.3206358320397621, 'finalact': 'sigmoid', 'LRfactor': 0.00673317775445606, 'LRpatience': 12, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224. [I 2023-05-04 22:05:14,122] Trial 28 finished with value: 0.0022593578323721886 and parameters: {'n_layers': 3, 'n_units_l0': 236, 'activation0': 'relu', 'dropout0': 0.02005084880934265, 'n_units_l1': 215, 'activation1': 'swish', 'dropout1': 0.3558095675348199, 'n_units_l2': 249, 'activation2': 'sigmoid', 'dropout2': 0.3320202293339337, 'finalact': 'sigmoid', 'LRfactor': 0.04551583786492822, 'LRpatience': 8, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224. [I 2023-05-04 22:05:34,064] Trial 29 finished with value: 0.0027342387475073338 and parameters: {'n_layers': 2, 'n_units_l0': 137, 'activation0': 'sigmoid', 'dropout0': 0.004848985915667459, 'n_units_l1': 158, 'activation1': 'swish', 'dropout1': 0.2775840109641383, 'finalact': 'relu', 'LRfactor': 0.0352036875097686, 'LRpatience': 9, 'optimizer': 'adam'}. Best is trial 20 with value: 0.002155124209821224.
Number of finished trials: 30 Best trial: Value: 0.002155124209821224
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))
Params:
n_layers: 3
n_units_l0: 197
activation0: linear
dropout0: 0.14939677163716047
n_units_l1: 193
activation1: swish
dropout1: 0.40035016781108546
n_units_l2: 376
activation2: sigmoid
dropout2: 0.29387234491389896
finalact: linear
LRfactor: 0.16202265241592126
LRpatience: 10
optimizer: adam
#From Optuna same data
model =Sequential()
model.add(keras.layers.LSTM(197,input_shape=(1, data_train.shape[1]-1),return_sequences=True,activation=tf.keras.activations.linear))
model.add(keras.layers.Dropout(0.15))
model.add(keras.layers.LSTM(193,input_shape=(1, data_train.shape[1]-1),return_sequences=True,activation=tf.keras.activations.swish))
model.add(keras.layers.Dropout(0.40))
model.add(keras.layers.LSTM(376,input_shape=(1, data_train.shape[1]-1),return_sequences=True,activation=tf.keras.activations.sigmoid))
model.add(keras.layers.Dropout(0.29))
model.add(keras.layers.Dense(1,activation=tf.keras.activations.linear))
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.16,patience=10,min_lr=1e-05,verbose=0)
early_stoping = EarlyStopping(monitor="val_loss",min_delta=0,patience=10,verbose=0,mode="auto",restore_best_weights=True)
model.compile(optimizer='adam',loss='mse')
val_ds = (xval_n,yval_n)
history = model.fit(xtrain_n,ytrain_n,validation_data=val_ds,epochs=200,verbose=0,callbacks=[early_stoping,reduce_lr])
hist=pd.DataFrame(history.history)
hist['epoch']=history.epoch
trace1 = go.Scatter(
x=hist['epoch'], y=hist['loss'],
mode='lines', name='Train_loss'
)
trace2 = go.Scatter(
x=hist['epoch'], y=hist['val_loss'],
mode='lines', name='Val_loss'
)
layout= go.Layout(
title= 'Optuna Loss best model History',
xaxis={'title':'Epoch'},
yaxis={'title':'Loss'}
)
fig= go.Figure(data=[trace1,trace2], layout=layout)
fig.show()
fig.write_html("Optuna Loss best model.html")
#From Optuna
model =Sequential()
model.add(keras.layers.LSTM(197,input_shape=(1, data_trainall.shape[1]-1),return_sequences=True,activation=tf.keras.activations.linear))
model.add(keras.layers.Dropout(0.15))
model.add(keras.layers.LSTM(193,input_shape=(1, data_trainall.shape[1]-1),return_sequences=True,activation=tf.keras.activations.swish))
model.add(keras.layers.Dropout(0.40))
model.add(keras.layers.LSTM(376,input_shape=(1, data_trainall.shape[1]-1),return_sequences=True,activation=tf.keras.activations.sigmoid))
model.add(keras.layers.Dropout(0.29))
model.add(keras.layers.Dense(1,activation=tf.keras.activations.linear))
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.16,patience=10,min_lr=1e-05,verbose=0)
early_stoping = EarlyStopping(monitor="loss",min_delta=0,patience=10,verbose=0,mode="auto",restore_best_weights=True)
model.compile(optimizer='adam',loss='mse')
history = model.fit(xtrainall_n,ytrainall_n,epochs=200,verbose=1,callbacks=[early_stoping,reduce_lr])
Epoch 1/200 79/79 [==============================] - 4s 11ms/step - loss: 0.1819 - lr: 0.0010 Epoch 2/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0784 - lr: 0.0010 Epoch 3/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0775 - lr: 0.0010 Epoch 4/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0680 - lr: 0.0010 Epoch 5/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0331 - lr: 0.0010 Epoch 6/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0140 - lr: 0.0010 Epoch 7/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0107 - lr: 0.0010 Epoch 8/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0100 - lr: 0.0010 Epoch 9/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0091 - lr: 0.0010 Epoch 10/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0086 - lr: 0.0010 Epoch 11/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0080 - lr: 0.0010 Epoch 12/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0078 - lr: 0.0010 Epoch 13/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0077 - lr: 0.0010 Epoch 14/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0074 - lr: 0.0010 Epoch 15/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0072 - lr: 0.0010 Epoch 16/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0071 - lr: 0.0010 Epoch 17/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0070 - lr: 0.0010 Epoch 18/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0070 - lr: 0.0010 Epoch 19/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0068 - lr: 0.0010 Epoch 20/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0072 - lr: 0.0010 Epoch 21/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0069 - lr: 0.0010 Epoch 22/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0069 - lr: 0.0010 Epoch 23/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0065 - lr: 0.0010 Epoch 24/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0065 - lr: 0.0010 Epoch 25/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0067 - lr: 0.0010 Epoch 26/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0064 - lr: 0.0010 Epoch 27/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0066 - lr: 0.0010 Epoch 28/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0065 - lr: 0.0010 Epoch 29/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0065 - lr: 0.0010 Epoch 30/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0066 - lr: 0.0010 Epoch 31/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0066 - lr: 0.0010 Epoch 32/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0063 - lr: 0.0010 Epoch 33/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0063 - lr: 0.0010 Epoch 34/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0062 - lr: 0.0010 Epoch 35/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0064 - lr: 0.0010 Epoch 36/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0062 - lr: 0.0010 Epoch 37/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0058 - lr: 0.0010 Epoch 38/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0061 - lr: 0.0010 Epoch 39/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0062 - lr: 0.0010 Epoch 40/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0063 - lr: 0.0010 Epoch 41/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0059 - lr: 0.0010 Epoch 42/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0060 - lr: 0.0010 Epoch 43/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0061 - lr: 0.0010 Epoch 44/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0062 - lr: 0.0010 Epoch 45/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0061 - lr: 0.0010 Epoch 46/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0058 - lr: 0.0010 Epoch 47/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0059 - lr: 0.0010 Epoch 48/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 1.6000e-04 Epoch 49/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 1.6000e-04 Epoch 50/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0056 - lr: 1.6000e-04 Epoch 51/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 1.6000e-04 Epoch 52/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0056 - lr: 1.6000e-04 Epoch 53/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 1.6000e-04 Epoch 54/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0055 - lr: 1.6000e-04 Epoch 55/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0055 - lr: 1.6000e-04 Epoch 56/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0056 - lr: 1.6000e-04 Epoch 57/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0055 - lr: 1.6000e-04 Epoch 58/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 1.6000e-04 Epoch 59/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 1.6000e-04 Epoch 60/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0055 - lr: 1.6000e-04 Epoch 61/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 1.6000e-04 Epoch 62/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 2.5600e-05 Epoch 63/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0053 - lr: 2.5600e-05 Epoch 64/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 2.5600e-05 Epoch 65/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0053 - lr: 2.5600e-05 Epoch 66/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 2.5600e-05 Epoch 67/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 2.5600e-05 Epoch 68/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 2.5600e-05 Epoch 69/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 2.5600e-05 Epoch 70/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0053 - lr: 2.5600e-05 Epoch 71/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0053 - lr: 2.5600e-05 Epoch 72/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 2.5600e-05 Epoch 73/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 2.5600e-05 Epoch 74/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0055 - lr: 1.0000e-05 Epoch 75/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0052 - lr: 1.0000e-05 Epoch 76/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0053 - lr: 1.0000e-05 Epoch 77/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0053 - lr: 1.0000e-05 Epoch 78/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 1.0000e-05 Epoch 79/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 1.0000e-05 Epoch 80/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0053 - lr: 1.0000e-05 Epoch 81/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0052 - lr: 1.0000e-05 Epoch 82/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 1.0000e-05 Epoch 83/200 79/79 [==============================] - 1s 10ms/step - loss: 0.0054 - lr: 1.0000e-05 Epoch 84/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0054 - lr: 1.0000e-05 Epoch 85/200 79/79 [==============================] - 1s 11ms/step - loss: 0.0053 - lr: 1.0000e-05
prediction=model.predict(xtest_n)
1/1 [==============================] - 0s 352ms/step
final_values=[]
for i in range(len(prediction)):
final_values.append(prediction[i][0][0])
df_final = pd.DataFrame(0, index=np.arange(len(data_test)), columns=data_test.columns)
df_final['GSPC']=final_values
df_final = scaler_all.inverse_transform(df_final)
final_values_rescaled=[]
for i in range(len(df_final)):
final_values_rescaled.append(df_final[i][0])
trace1 = go.Scatter(
x=pd.to_datetime(traindates), y=data_train.iloc[:,0],
mode='lines', name='Datatrain'
)
trace2 = go.Scatter(
x=pd.to_datetime(testdates), y=data_test.iloc[:,0],
mode='lines', name='Datatest'
)
trace3 = go.Scatter(
x=pd.to_datetime(valdates), y=data_val.iloc[:,0],
mode='lines', name='Dataval'
)
trace4 = go.Scatter(
x=pd.to_datetime(testdates), y=final_values_rescaled,
mode='lines', name='Prediction'
)
layout= go.Layout(
title= 'S&P 500 Forecast',
xaxis={'title':'Date'},
yaxis={'title':'Close'}
)
fig= go.Figure(data=[trace1,trace2,trace3,trace4], layout=layout)
fig.show()
fig.write_html("SP500.html")
from sklearn.metrics import r2_score,mean_squared_error
import math
r2_score(data_test.iloc[:,0], final_values_rescaled)
0.8381477171656116
mse = mean_squared_error(data_test.iloc[:,0], final_values_rescaled)
rmse = math.sqrt(mse)
rmse
0.41265596520482867
plt.plot(data_test.iloc[:,0], final_values_rescaled, 'ro')
plt.show()
model_json = model.to_json()
with open("modelRendSP500.json", "w") as json_file:
json_file.write(model_json)
model.save_weights("modelRendSP500.h5")
import joblib
joblib.dump(scaler_all, 'scalerRendSP500.gz')
['scalerRendSP500.gz']